@dataknut)knitr::opts_chunk$set(echo = TRUE)
# Set start time ----
startTime <- proc.time()
# Libraries ----
message("Loading libraries...")
## Loading libraries...
library(here) # where are we?
## here() starts at /Users/ben/github/dataknut/botanicals
library(data.table) # data frames with superpowers
library(ggplot2) # pretty plots
library(kableExtra) # pretty tables
library(lubridate) # date & time stuff
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following object is masked from 'package:here':
##
## here
## The following object is masked from 'package:base':
##
## date
library(plotly) # interactive plots
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(skimr) # descriptive & summary stats
# Parameters ----
dPath <- paste0(here::here(), "/data/")
dFile <- paste0(dPath, "ClimateData_1976_2019.csv")
# get data ----
message("Getting data from ", dFile)
## Getting data from /Users/ben/github/dataknut/botanicals/data/ClimateData_1976_2019.csv
weatherDT <- data.table::fread(dFile) # load data
We want to load some UK weather data and have a look at it. We then want to see if we can:
weatherDT$V1 <- NULL # what's the row number for?
# do some pre-processing ----
weatherDT[, date := lubridate::dmy(Date)] # make nice date
weatherDT[, year := lubridate::year(date)] # make nice date
weatherDT[, month := lubridate::month(date, label = TRUE, abbr = TRUE)] # make nice date
t <- head(weatherDT) # what have we got?
kableExtra::kable(t, caption = "First few rows of processed data") %>%
kable_styling()
| Date | maxT | minT | prcpt | date | year | month |
|---|---|---|---|---|---|---|
| 1/01/76 | 8.0 | 1.1 | 17.3 | 1976-01-01 | 1976 | Jan |
| 2/01/76 | 9.6 | 0.3 | 41.0 | 1976-01-02 | 1976 | Jan |
| 3/01/76 | 10.3 | 1.3 | 26.6 | 1976-01-03 | 1976 | Jan |
| 4/01/76 | 10.6 | 1.4 | 5.3 | 1976-01-04 | 1976 | Jan |
| 5/01/76 | 9.2 | -0.2 | 18.3 | 1976-01-05 | 1976 | Jan |
| 6/01/76 | 8.1 | 4.7 | 4.7 | 1976-01-06 | 1976 | Jan |
skimr::skim(weatherDT)
| Name | weatherDT |
| Number of rows | 16063 |
| Number of columns | 7 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| Date | 1 |
| factor | 1 |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| Date | 0 | 1 | 7 | 8 | 0 | 16063 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date | 0 | 1 | 1976-01-01 | 2019-12-31 | 1997-12-27 | 16063 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| month | 0 | 1 | TRUE | 12 | Jan: 1364, Mar: 1364, May: 1364, Jul: 1364 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| maxT | 0 | 1 | 11.55 | 6.02 | -6.5 | 7.1 | 11.4 | 15.9 | 32.70 | ▁▆▇▃▁ |
| minT | 0 | 1 | 5.13 | 4.95 | -14.0 | 1.4 | 5.2 | 9.0 | 25.46 | ▁▃▇▃▁ |
| prcpt | 0 | 1 | 3.65 | 6.30 | 0.0 | 0.0 | 0.7 | 4.9 | 72.20 | ▇▁▁▁▁ |
| year | 0 | 1 | 1997.49 | 12.70 | 1976.0 | 1986.0 | 1997.0 | 2008.0 | 2019.00 | ▇▇▇▇▇ |
Just for fun let’s plot the data. You should be able to hover the mouse over the dots in 3.1 to investigate.
# just for fun
plotDT <- weatherDT[, .(meanPrcpt = mean(prcpt)),
keyby = .(year, month)]
p <- ggplot2::ggplot(plotDT, aes(x = month, y = meanPrcpt, group = year, colour = year)) +
geom_point() +
scale_color_viridis_c() +
labs(y = "Mean precipitation",
caption = "Year treated as continuous for pretty colours")
plotly::ggplotly(p) # interactive plot
Figure 3.1: Mean precipitation per month by year
So that looks vaguely right.
Count the number of spans (per year?) of 15 or more days when the daily rainfall < 0.2. This is the UK definition of drought.
To do this we:
Table 4.1 shows the number of low precipitation events (1 or more days with < 0.2) which were droughts (> 14 days in a row) by start month and year. Perhaps surprisingly 1976 has none.
setkey(weatherDT,date) # ensures ordered by date
weatherDT[, droughtThresh := ifelse(prcpt < 0.2, 1, 0)] # to make life easier
# flag the start & ends of low precipitation periods
weatherDT[, droughtPeriod := ifelse(droughtThresh == 1 &
shift(droughtThresh == 0), # first of 1 -> n days with low
"Start",
NA)]
weatherDT[, droughtPeriod := ifelse(droughtThresh == 0 &
shift(droughtThresh == 1), # last of 1 -> n days with low
"End",
droughtPeriod)]
# now isolate the start & ends of low precipitation periods
lowPrcptPeriodsDT <- weatherDT[droughtPeriod == "Start" |
droughtPeriod == "End"]
lowPrcptPeriodsDT[, periodCount := ifelse(droughtPeriod == "Start",
shift(date, type = "lead") - date, # n days between start & end (they are ordered)
NA) # undefined between end & start (we don't care about periods between droughts for now)
]
# flag the droughts using the 14 day threshold
lowPrcptPeriodsDT[, drought := ifelse(periodCount > 14,
"Drought",
"Not drought")]
plotDT <- lowPrcptPeriodsDT[, .(nDroughts = .N), # data.table magic
keyby = .(drought, year, start = month)] # add up number of droughts/no droughts in these periods of low precip
kableExtra::kable(plotDT[drought == "Drought"], caption = "Number of low precipitation periods that were droughts by year") %>%
kable_styling()
| drought | year | start | nDroughts |
|---|---|---|---|
| Drought | 1977 | May | 1 |
| Drought | 1980 | May | 1 |
| Drought | 1981 | Aug | 1 |
| Drought | 1984 | Apr | 1 |
| Drought | 1984 | Aug | 1 |
| Drought | 1985 | Oct | 1 |
| Drought | 1989 | Nov | 1 |
| Drought | 1991 | Aug | 1 |
| Drought | 1991 | Nov | 1 |
| Drought | 1992 | Jun | 1 |
| Drought | 1995 | Apr | 1 |
| Drought | 1995 | Jun | 1 |
| Drought | 1995 | Jul | 1 |
| Drought | 1997 | May | 1 |
| Drought | 2000 | May | 1 |
| Drought | 2002 | Sep | 1 |
| Drought | 2003 | Mar | 1 |
| Drought | 2003 | Apr | 1 |
| Drought | 2011 | Apr | 1 |
| Drought | 2013 | Jul | 1 |
| Drought | 2018 | Jun | 1 |
| Drought | 2019 | Apr | 1 |
Just to make this clear, Figure 4.1 shows the pattern of number of droughts by start month and year.
p <- ggplot2::ggplot(plotDT[drought == "Drought"],
aes(x = start, fill = nDroughts, y = year)) +
geom_tile() +
labs(caption = "Note: months without droughts (Jan/Feb & Dec) not shown")
p
Figure 4.1: Number of droughts by start month and year
Of note:
t <- proc.time() - startTime
elapsed <- t[[3]]
Analysis completed in 4.159 seconds ( 0.07 minutes) using knitr in RStudio with R version 3.6.3 (2020-02-29) running on x86_64-apple-darwin15.6.0.
R packages used:
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_NZ.UTF-8/en_NZ.UTF-8/en_NZ.UTF-8/C/en_NZ.UTF-8/en_NZ.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] skimr_2.1 plotly_4.9.2 lubridate_1.7.4 kableExtra_1.1.0
## [5] ggplot2_3.3.0 data.table_1.12.8 here_0.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.0.0 xfun_0.12 repr_1.1.0 purrr_0.3.3
## [5] colorspace_1.4-1 vctrs_0.2.3 htmltools_0.4.0 viridisLite_0.3.0
## [9] yaml_2.2.1 base64enc_0.1-3 rlang_0.4.5 later_1.0.0
## [13] pillar_1.4.3 glue_1.3.1 withr_2.1.2 lifecycle_0.2.0
## [17] stringr_1.4.0 munsell_0.5.0 gtable_0.3.0 rvest_0.3.5
## [21] htmlwidgets_1.5.1 evaluate_0.14 labeling_0.3 knitr_1.28
## [25] fastmap_1.0.1 httpuv_1.5.2 crosstalk_1.0.0 highr_0.8
## [29] Rcpp_1.0.3 xtable_1.8-4 readr_1.3.1 promises_1.1.0
## [33] scales_1.1.0 backports_1.1.5 webshot_0.5.2 jsonlite_1.6.1
## [37] mime_0.9 farver_2.0.3 hms_0.5.3 digest_0.6.25
## [41] stringi_1.4.6 shiny_1.4.0 bookdown_0.18 dplyr_0.8.5
## [45] grid_3.6.3 rprojroot_1.3-2 tools_3.6.3 magrittr_1.5
## [49] lazyeval_0.2.2 tibble_2.1.3 crayon_1.3.4 tidyr_1.0.2
## [53] pkgconfig_2.0.3 xml2_1.2.2 assertthat_0.2.1 rmarkdown_2.1
## [57] httr_1.4.1 rstudioapi_0.11 R6_2.4.1 compiler_3.6.3
Arino de la Rubia, Eduardo, Hao Zhu, Shannon Ellis, Elin Waring, and Michael Quinn. 2017. Skimr: Skimr. https://github.com/ropenscilabs/skimr.
Dowle, M, A Srinivasan, T Short, S Lianoglou with contributions from R Saporta, and E Antonyan. 2015. Data.table: Extension of Data.frame. https://CRAN.R-project.org/package=data.table.
Grolemund, Garrett, and Hadley Wickham. 2011. “Dates and Times Made Easy with lubridate.” Journal of Statistical Software 40 (3): 1–25. http://www.jstatsoft.org/v40/i03/.
Müller, Kirill. 2017. Here: A Simpler Way to Find Your Files. https://CRAN.R-project.org/package=here.
R Core Team. 2016. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.
Wickham, Hadley. 2009. Ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. http://ggplot2.org.
Xie, Yihui. 2016. Knitr: A General-Purpose Package for Dynamic Report Generation in R. https://CRAN.R-project.org/package=knitr.
Zhu, Hao. 2018. KableExtra: Construct Complex Table with ’Kable’ and Pipe Syntax. https://CRAN.R-project.org/package=kableExtra.